R Markdown

library(tidyverse)
## ─ Attaching packages ──────────────────── tidyverse 1.3.1 ─
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ─ Conflicts ───────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(leaflet)
library(p8105.datasets)

knitr::opts_chunk$set(
  fig.width = 6,
  fig.asp = .6,
  out.width = "90%"
)

theme_set(theme_minimal() + theme(legend.position = "bottom"))

options(
  ggplot2.continuous.colour = "viridis",
  ggplot2.continuous.fill = "viridis"
)

scale_colour_discrete = scale_colour_viridis_d
scale_fill_discrete = scale_fill_viridis_d

Load a dataset

nyc_airbnb <- 
  nyc_airbnb %>% 
  mutate(stars = review_scores_location/2) %>% 
  rename(borough = neighbourhood_group)

Lets try to count things

# make a table
nyc_airbnb %>% 
  count(borough,room_type) %>% 
  pivot_wider(
    names_from = room_type,
    values_from = n
  )
## # A tibble: 5 × 4
##   borough       `Entire home/apt` `Private room` `Shared room`
##   <chr>                     <int>          <int>         <int>
## 1 Bronx                       192            429            28
## 2 Brooklyn                   7427           9000           383
## 3 Manhattan                 10814           7812           586
## 4 Queens                     1388           2241           192
## 5 Staten Island               116            144             1
  #janitor::tabyl(borough,room_type)
ggplot(nyc_airbnb,aes(x=long,y=lat,color=borough))+
  geom_point(size = 0.1)

pal <- colorNumeric(
  palette = "viridis",
  domain = nyc_airbnb$stars)

nyc_airbnb %>% 
  na.omit(stars) %>% 
  mutate(
    click_label = 
      str_c("<b>$", price, "</b><br>", stars, " stars<br>", number_of_reviews, " reviews")) %>% 
  leaflet() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addCircleMarkers(~lat, ~long, radius = .1, color = ~pal(stars), popup = ~click_label)

let’s lo0k at price and room type

nyc_airbnb %>% 
  ggplot(aes(x = stars, y = price)) +
  geom_point()+
  facet_grid(.~room_type)
## Warning: Removed 10037 rows containing missing values (geom_point).

let’s look at price and neighborhood

nyc_airbnb %>% 
  filter(borough == "Manhattan") %>% 
  group_by(neighbourhood) %>% 
  summarize(mean_price = mean(price,na.rm =TRUE)) %>% 
  arrange(mean_price)
## # A tibble: 32 × 2
##    neighbourhood       mean_price
##    <chr>                    <dbl>
##  1 Marble Hill               83.6
##  2 Inwood                    86.5
##  3 Washington Heights        90.1
##  4 Morningside Heights      107. 
##  5 Harlem                   117. 
##  6 Roosevelt Island         122. 
##  7 Two Bridges              123. 
##  8 East Harlem              128. 
##  9 Chinatown                158. 
## 10 Upper East Side          172. 
## # … with 22 more rows
nyc_airbnb %>% 
  filter(
    borough == "Manhattan",
    price <= 1000) %>% 
  mutate(
    neighbourhood = fct_reorder(neighbourhood,price)
    ) %>% 
  ggplot(aes(x = neighbourhood,y = price))+
  geom_boxplot()+
  coord_flip()+
  facet_grid(.~room_type)

获得地铁站的经纬度,结合现有数据,进行爱彼迎公寓距离地铁距离的分析

price vs location

nyc_airbnb %>% 
  filter(price<500) %>% 
  sample_n(5000) %>% 
  ggplot(aes(x= lat, y=long,color=price))+
  geom_point(size = 0.1)

retry leaflet

pal <- colorNumeric("viridis",NULL)

label <- paste0("<b>", nyc_airbnb$name, "</b><br>$", nyc_airbnb$price)

nyc_airbnb %>% 
  filter(price<500) %>% 
  sample_n(1000) %>% 
  leaflet() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addCircleMarkers(~lat,~long,radius = 1,color = ~pal(price),popup = ~ label)